This is an analysis of differential expression at the transcript level between shedding rate groupings based on average shedding rates for infected blue-winged teal ileum samples.
## Load packages and data
library(limma)
library(edgeR)
library(Glimma)
library(gplots)
library(ggrepel)
library(RColorBrewer)
library(clusterProfiler)
library(gridExtra)
library(kableExtra)
library(topGO)
library(tidyverse)
## Load data
annot <- read.delim("G:/Shared drives/RNA Seq Supershedder Project/BWTE DE manuscript/extData/Trinotate.csv", header = TRUE, sep = "\t")
cnt <- read.table("G:/Shared drives/RNA Seq Supershedder Project/BWTE DE manuscript/extData/rsem.isoform.counts.matrix", header = TRUE)
covars <- read.csv("G:/Shared drives/RNA Seq Supershedder Project/BWTE DE manuscript/extData/BWTE54_SSgroup_Raw_Pool.csv", header = TRUE)
Bird 36 is missing a measurement for the weight covariate and bird 19 had improper tissue-based grouping, suggesting a potential sample mix up. They are both removed from the analysis. We also rename the transcripts, dropping the “Trinity_” prefix.
cnt <- cnt %>%
select(
-alignEstimateAbundance_BWTE_Ileum_36_S50,
-alignEstimateAbundance_BWTE_Bursa_36_S31,-alignEstimateAbundance_BWTE_Ileum_19_S35,
-alignEstimateAbundance_BWTE_Bursa_19_S14) %>%
rownames_to_column("transcript") %>%
separate(transcript, into = c(NA, "pt1", "pt2", "gene", "isoform")) %>%
unite(transcript, pt1, pt2, gene, isoform) %>%
column_to_rownames("transcript") %>%
select(contains("Ileum"))
covars <- covars %>%
filter(!bird %in% c("36", "19")) %>%
arrange(bird) %>%
mutate(group = str_remove(group, "-"))
annot <- annot %>%
separate(transcript_id, into = c(NA, "pt1", "pt2", "gene", "isoform")) %>%
unite(transcript_id, pt1, pt2, gene, isoform)
bird <- as.factor(covars$bird)
sex <- as.factor(covars$sex)
age <- as.numeric(covars$age)
weight <- covars$wt_55
group <- recode(covars$group, C1 = "Ctl", C14 = "Ctl") %>%
as.factor()
pool <- as.factor(covars$Pool.Ileum)
ssgroup.virus.avg <- as.factor(covars$SSgroup.virus.avg)
covars.tib <- data.frame(bird,
sex,
age,
weight,
group,
pool,
ssgroup.virus.avg) %>%
as_tibble()
Here we standardize expression levels across samples of varying depth by using the counts per million (CPM) and trimmed mean of M-values (TMM) methods. We also remove any transcripts that are expressed at few than 0.5 CPM in at least 25% of individuals.
#Convert to DGEList object
dge <- DGEList(counts=cnt)
dge$genes <- annot[match(rownames(dge$counts), annot$transcript_id),]
#CPM and log-CPM
cpm <- cpm(dge)
lcpm <- cpm(dge, log = TRUE)
#### Retain only transcripts expressed at >0.5 CPM in at least 25% of individuals
table(rowSums(dge$counts==0) == length(cnt[1,]))
##
## FALSE TRUE
## 484888 86217
keep.exprs <- rowSums(cpm>0.5) >= length(cnt[1,])/4
sum(keep.exprs)
## [1] 66980
dge <- dge[keep.exprs,, keep.lib.sizes=FALSE]
table(rowSums(dge$counts==0) == length(cnt[1,]))
##
## FALSE
## 66980
dim(dge)
## [1] 66980 52
#TMM normalization
dge <- calcNormFactors(dge, method = "TMM")
Following normalization, we subset the data based on infection groups.
#Assign groups in DGElist
dge$samples$group <- covars.tib$group
### Divide out I5
dge.I5 <- dge[,grep('I5', dge$samples$group)]
covars.I5 <- covars.tib %>% filter(group == "I5")
## Establish design matrix: I5
design.I5 = model.matrix(~ 0 +
factor(covars.I5$ssgroup.virus.avg) +
covars.I5$age +
covars.I5$sex +
covars.I5$weight +
covars.I5$pool)
colnames(design.I5) <- c("HIGH", "LOW", "MODERATE",
"age", "sexM", "weight", "pool3")
contr.matrix.I5 = makeContrasts(
LvM = LOW - MODERATE,
MvH = MODERATE - HIGH,
LvH = LOW - HIGH,
levels = colnames(design.I5))
## Mean-variance trend plots
v.I5 <- voomWithQualityWeights(dge.I5, design.I5, plot = FALSE)
#Fitting models
vfit.I5 <- lmFit(v.I5, design.I5)
vfit.I5 <- contrasts.fit(vfit.I5, contrasts = contr.matrix.I5)
tfit.I5 <- treat(vfit.I5, lfc = 1.0)
## Identify DE genes
dt.I5 <- decideTests(tfit.I5, p.value = 0.1, adjust.method = "fdr")
dt.tib.I5 <- as_tibble(dt.I5, rownames = NA) %>%
rownames_to_column("transcript") %>%
mutate_at(vars(starts_with("L")), as.numeric) %>%
mutate_at(vars(starts_with("M")), as.numeric) %>%
rename(LvM.I5 = LvM,
MvH.I5 = MvH,
LvH.I5 = LvH)
dt.tib <- dt.tib.I5 %>%
mutate_at(vars(starts_with("L")), as.numeric) %>%
mutate_at(vars(starts_with("M")), as.numeric) %>%
filter_at(vars(LvM.I5:LvH.I5), any_vars(. != 0))
allResults <- as.data.frame(summary(dt.I5)) %>%
filter(Var1 != "NotSig")
kable(allResults) %>%
kable_styling("striped", full_width = F)
| Var1 | Var2 | Freq |
|---|---|---|
| Down | LvM | 0 |
| Up | LvM | 0 |
| Down | MvH | 1 |
| Up | MvH | 38 |
| Down | LvH | 1 |
| Up | LvH | 102 |
tmp1 <- topTreat(tfit.I5, coef = 1, n = Inf)
results1 <- mutate(tmp1, sig=ifelse(tmp1$adj.P.Val<0.1, "Sig", "Not Sig"))
p1 <- ggplot(results1, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Low vs. Moderate shedding") +
ylab("-Log10(Adjusted p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
tmp2 <- topTreat(tfit.I5, coef = 2, n = Inf)
results2 <- mutate(tmp2, sig=ifelse(tmp2$adj.P.Val<0.1, "Sig", "Not Sig"))
p2 <- ggplot(results2, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Moderate vs. High shedding") +
ylab("-Log10(Adjusted p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
tmp3 <- topTreat(tfit.I5, coef = 3, n = Inf)
results3 <- mutate(tmp3, sig=ifelse(tmp3$adj.P.Val<0.1, "Sig", "Not Sig"))
p3 <- ggplot(results3, aes(logFC, -log10(P.Value))) +
geom_point(aes(col = sig)) +
scale_color_manual(values=c("black", "red")) +
ggtitle("Low vs. High shedding") +
ylab("-Log10(Adjusted p-value)") +
xlab("Log(Fold count)") +
theme_bw() +
theme(legend.position = "none")
grid.arrange(p1, p2, p3, nrow = 1)
# Get the gene names for DE genes
lcpm2 <- as_tibble(dge.I5$counts) %>%
cpm(., log = TRUE)
newNames <- colnames(lcpm2) %>%
as_tibble() %>%
separate(value, into = c(NA, NA, NA, "sample", NA)) %>%
bind_cols(covars.I5) %>%
unite("newNames", ssgroup.virus.avg, group, bird)
colOrder <- bind_cols(covars.I5) %>%
mutate(ssgroup.virus.avg = fct_relevel(ssgroup.virus.avg, "LOW", "MODERATE", "HIGH")) %>%
arrange(ssgroup.virus.avg, group, bird) %>%
unite("newNames", ssgroup.virus.avg, group, bird)
colnames(lcpm2) <- newNames$newNames
lcpm2 <- lcpm2[, colOrder$newNames]
heatmap.annot <- annot %>%
select(
transcript_id,
sprot_Top_BLASTX_hit) %>%
filter(transcript_id %in% dt.tib$transcript) %>%
separate(sprot_Top_BLASTX_hit, into = c("sprot1", NA, NA, NA, NA, "sprot2", NA), "\\^") %>%
separate(sprot2, sep = "=", into = c(NA, "SwissProt_GeneFunction")) %>%
separate(sprot1, sep = "_", into = c("SwissProt_GeneName", NA)) %>%
distinct(transcript_id, SwissProt_GeneName) %>%
filter(SwissProt_GeneName != ".")
deGeneCounts <- lcpm2 %>%
as_tibble() %>%
mutate(transcript = rownames(dge$counts)) %>%
filter(transcript %in% dt.tib$transcript) %>%
rename(transcript_id = transcript) %>%
left_join(heatmap.annot) %>%
mutate(SwissProt_GeneName = replace_na(SwissProt_GeneName, ".")) %>%
unite(transcript, transcript_id:SwissProt_GeneName, sep = " - ") %>%
column_to_rownames("transcript") %>%
as.matrix()
## Set up palette
mypalette <- brewer.pal(11,"RdYlBu")
morecols <- colorRampPalette(mypalette)
hc <- hclust(as.dist(1-cor(t(deGeneCounts))))
# Plot the heatmap
heatmap.2(deGeneCounts,
Colv = FALSE,
Rowv = as.dendrogram(hc),
col = rev(morecols(50)),
trace = "none",
colsep = c(5, 10),
dendrogram = "row",
density.info = "none",
key = TRUE,
#main = "Ileum - Gene level",
margins = c(10, 10),
scale ="row")
lcpm.DE <- lcpm %>%
as_tibble(rownames = NA) %>%
rownames_to_column("identifier") %>%
filter(identifier %in% dt.tib$transcript) %>%
pivot_longer(cols = contains("_"),
names_to = "sample",
values_to = "lcpm") %>%
separate(sample, into = c(NA, NA, "tissue", "bird", NA)) %>%
mutate(bird = as.integer(bird)) %>%
left_join(covars, by = "bird") %>%
filter(group == "C1" | group == "C14" | group == "I5") %>%
select(identifier, bird, lcpm, SSgroup.virus.avg, group) %>%
mutate(group = recode(group, C14 = "Control"),
group = recode(group, C1 = "Control"),
tmpID = ifelse(group == "Control", 'Control', 'Infected'))
aprioriPlotting <- function(target, ...) {
annot.target <- annot %>%
select(
transcript_id,
sprot_Top_BLASTX_hit) %>%
filter(transcript_id == target) %>%
separate(sprot_Top_BLASTX_hit, into = c("sprot1", NA, NA, NA, NA, NA, NA), "\\^") %>%
separate(sprot1, sep = "_", into = c("SwissProt_GeneName", NA))
plot <- lcpm.DE %>%
filter(identifier == target) %>%
mutate(SSgroup.virus.avg = fct_relevel(SSgroup.virus.avg, "LOW", "MODERATE", "HIGH")) %>%
ggplot(aes(x = SSgroup.virus.avg, y = lcpm, fill = factor(group))) +
facet_grid(. ~ tmpID, scales = "free", space = "free") +
ylab("Log2(Counts per million)") +
xlab("Shedding Group") +
scale_fill_grey(start = 0.35, end = 1) +
geom_point(position = position_dodge(width=0.75), aes(group = group), show.legend = FALSE) +
geom_boxplot(alpha = 0.5) +
geom_label_repel(aes(label = bird, group = group, fill = NULL), position = position_dodge(width=0.75), show.legend=FALSE) +
theme_classic() +
labs(title= paste0(target, " - ", annot.target[1,2])) +
theme(legend.title = element_blank())
print(plot)
}
for(trans in sort(unique(lcpm.DE$identifier))) {
aprioriPlotting(trans)
}
annot %>%
select(
transcript_id,
sprot_Top_BLASTX_hit
) %>%
rename(transcript = transcript_id) %>%
inner_join(dt.tib) %>%
separate(sprot_Top_BLASTX_hit, into = c("sprot1", NA, NA, NA, NA, "sprot2", NA), "\\^") %>%
separate(sprot2, sep = "=", into = c(NA, "SwissProt_GeneFunction")) %>%
separate(sprot1, sep = "_", into = c("SwissProt_GeneName", NA)) %>%
mutate_at(.vars = vars(LvM.I5:LvH.I5),
.funs = ~ ifelse(. == 1, "up", .)) %>%
mutate_at(.vars = vars(LvM.I5:LvH.I5),
.funs = ~ ifelse(. == -1, "down", .)) %>%
filter_at(vars(LvM.I5:LvH.I5), any_vars(. != 0)) %>%
distinct(transcript, .keep_all = TRUE) %>%
select(-SwissProt_GeneFunction) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| transcript | SwissProt_GeneName | LvM.I5 | MvH.I5 | LvH.I5 |
|---|---|---|---|---|
| DN89_c3_g1_i2 | MEIS1 | 0 | up | up |
| DN11629_c0_g1_i9 | . | 0 | 0 | up |
| DN10879_c1_g1_i8 | RLF | 0 | 0 | up |
| DN2043_c0_g1_i3 | KBRS2 | 0 | 0 | up |
| DN2036_c0_g1_i4 | MBD2 | 0 | 0 | up |
| DN430_c0_g1_i5 | GLT12 | 0 | 0 | up |
| DN7842_c0_g1_i8 | ALG13 | 0 | up | up |
| DN1146_c0_g1_i3 | MD12L | 0 | 0 | up |
| DN1128_c0_g1_i5 | RCC1 | 0 | up | up |
| DN6928_c0_g1_i5 | NAIF1 | 0 | 0 | up |
| DN6089_c0_g2_i9 | . | 0 | up | up |
| DN6099_c0_g1_i3 | SC22A | 0 | up | up |
| DN15503_c0_g1_i2 | POL | 0 | up | up |
| DN15518_c0_g1_i8 | SCNNB | 0 | up | up |
| DN885_c0_g1_i1 | BUD31 | 0 | 0 | up |
| DN5108_c0_g1_i1 | TBC13 | 0 | 0 | up |
| DN5157_c1_g1_i10 | . | 0 | 0 | up |
| DN4235_c0_g1_i1 | SIM13 | 0 | up | up |
| DN4247_c0_g1_i13 | DPOG2 | 0 | up | up |
| DN5456_c0_g1_i2 | C560 | 0 | up | up |
| DN3676_c0_g1_i10 | CNOT6 | 0 | 0 | up |
| DN2785_c0_g1_i2 | UBA5 | 0 | 0 | up |
| DN8539_c0_g1_i1 | CHAC1 | 0 | 0 | up |
| DN7628_c0_g1_i2 | DIEXF | 0 | up | up |
| DN207_c1_g1_i14 | IQEC1 | 0 | 0 | up |
| DN4008_c0_g1_i8 | PTPRK | 0 | 0 | up |
| DN663_c0_g1_i4 | AGRG5 | 0 | 0 | up |
| DN682_c0_g3_i4 | RRAGA | 0 | 0 | up |
| DN613_c2_g1_i7 | MRVI1 | 0 | 0 | up |
| DN603_c0_g1_i9 | TERT | 0 | 0 | up |
| DN3137_c0_g1_i9 | S2536 | 0 | 0 | up |
| DN7924_c0_g1_i6 | CAMP1 | 0 | up | up |
| DN18178_c0_g1_i3 | PMGE | 0 | 0 | up |
| DN10078_c1_g1_i12 | PARN | 0 | 0 | up |
| DN16556_c0_g1_i2 | BICRL | 0 | 0 | up |
| DN5255_c0_g2_i6 | TMCC1 | 0 | 0 | up |
| DN4309_c0_g1_i7 | PCSK6 | 0 | 0 | up |
| DN2519_c0_g1_i4 | . | 0 | 0 | up |
| DN12268_c0_g1_i8 | KYNU | 0 | up | up |
| DN10663_c0_g1_i2 | SIR5 | 0 | up | up |
| DN2832_c0_g1_i12 | . | 0 | 0 | up |
| DN1982_c0_g1_i3 | . | 0 | 0 | up |
| DN1956_c0_g1_i7 | LPIN1 | 0 | 0 | up |
| DN25185_c0_g1_i5 | . | 0 | 0 | up |
| DN6804_c0_g1_i7 | . | 0 | 0 | up |
| DN6804_c0_g1_i1 | DKC1 | 0 | 0 | up |
| DN6850_c1_g1_i3 | ELMO2 | 0 | up | up |
| DN22798_c0_g1_i8 | SNX17 | 0 | 0 | up |
| DN9924_c0_g2_i7 | . | 0 | up | up |
| DN2376_c1_g1_i6 | AT8B2 | 0 | up | up |
| DN2387_c1_g1_i3 | . | 0 | up | up |
| DN2333_c0_g1_i2 | F169A | 0 | 0 | up |
| DN2333_c1_g1_i1 | F135A | 0 | up | up |
| DN2397_c1_g1_i4 | IRAK4 | 0 | up | 0 |
| DN1407_c5_g1_i2 | . | 0 | down | 0 |
| DN1433_c0_g1_i11 | ARI1B | 0 | 0 | up |
| DN1449_c0_g1_i12 | CBX1 | 0 | 0 | up |
| DN6364_c0_g1_i4 | PLGT2 | 0 | up | up |
| DN10398_c0_g1_i3 | RHG29 | 0 | 0 | up |
| DN3594_c0_g1_i2 | . | 0 | up | up |
| DN6658_c2_g1_i10 | . | 0 | 0 | up |
| DN6616_c2_g1_i17 | CCN2 | 0 | 0 | up |
| DN5702_c0_g1_i6 | LMBL3 | 0 | 0 | up |
| DN5796_c0_g1_i8 | . | 0 | 0 | up |
| DN143_c2_g1_i2 | . | 0 | 0 | up |
| DN133_c1_g1_i1 | LRWD1 | 0 | up | up |
| DN154_c3_g1_i4 | . | 0 | 0 | up |
| DN21574_c0_g1_i1 | . | 0 | up | up |
| DN21599_c0_g1_i6 | MNS1 | 0 | up | up |
| DN11783_c0_g3_i1 | . | 0 | 0 | up |
| DN9768_c0_g1_i7 | CCNE2 | 0 | 0 | up |
| DN9743_c0_g1_i21 | CXXC1 | 0 | 0 | up |
| DN3085_c1_g1_i6 | . | 0 | 0 | up |
| DN3018_c0_g1_i18 | KHK | 0 | up | up |
| DN3084_c0_g1_i13 | . | 0 | 0 | up |
| DN3044_c1_g1_i4 | ANR26 | 0 | up | up |
| DN582_c0_g1_i1 | RAD21 | 0 | up | up |
| DN2129_c0_g1_i12 | OGFD2 | 0 | 0 | up |
| DN10956_c0_g1_i1 | MRC1 | 0 | 0 | up |
| DN1264_c0_g1_i2 | REEP2 | 0 | 0 | up |
| DN1230_c0_g1_i13 | CFLAR | 0 | 0 | up |
| DN6130_c0_g1_i8 | GCN1 | 0 | up | up |
| DN3358_c1_g1_i14 | TRPS1 | 0 | 0 | up |
| DN3362_c0_g1_i6 | . | 0 | up | 0 |
| DN2447_c4_g1_i3 | . | 0 | 0 | up |
| DN2431_c0_g1_i15 | . | 0 | 0 | down |
| DN7377_c0_g1_i8 | ATAD3 | 0 | 0 | up |
| DN6423_c0_g1_i4 | IFT74 | 0 | 0 | up |
| DN10599_c0_g1_i1 | PEDF | 0 | up | up |
| DN1083_c2_g1_i11 | . | 0 | 0 | up |
| DN1044_c0_g1_i3 | ZN318 | 0 | up | up |
| DN327_c0_g1_i6 | MSD2 | 0 | 0 | up |
| DN308_c0_g1_i5 | ZCHC2 | 0 | 0 | up |
| DN5860_c0_g1_i2 | PACR | 0 | up | up |
| DN4911_c0_g1_i8 | SETMR | 0 | up | 0 |
| DN5091_c0_g2_i6 | ATL2 | 0 | up | up |
| DN751_c2_g3_i2 | . | 0 | 0 | up |
| DN745_c0_g1_i22 | SEPT7 | 0 | 0 | up |
| DN9837_c0_g1_i9 | HOP2 | 0 | 0 | up |
| DN2291_c0_g1_i9 | MCAT | 0 | 0 | up |
| DN2224_c0_g1_i1 | . | 0 | up | up |
| DN15912_c0_g1_i10 | . | 0 | 0 | up |
| DN32038_c0_g1_i1 | . | 0 | 0 | up |
| DN9334_c0_g1_i4 | CARM1 | 0 | up | up |
| DN8480_c0_g2_i2 | ARI5B | 0 | up | up |
| DN14071_c0_g1_i5 | . | 0 | 0 | up |
| DN130272_c0_g3_i2 | CPTP | 0 | 0 | up |
annot %>%
select(
transcript_id,
sprot_Top_BLASTX_hit,
Kegg) %>%
rename(transcript = transcript_id) %>%
inner_join(dt.tib) %>%
separate(sprot_Top_BLASTX_hit, into = c("sprot1", NA, NA, NA, NA, "sprot2", NA), "\\^") %>%
separate(sprot2, sep = "=", into = c(NA, "SwissProt_GeneFunction")) %>%
separate(sprot1, sep = "_", into = c("SwissProt_GeneName", NA)) %>%
separate(Kegg, into = c("KEGG_ID", "KO_ID"), sep = "\\`") %>%
distinct(transcript, SwissProt_GeneName, KEGG_ID, .keep_all = TRUE) %>%
select(transcript, SwissProt_GeneName, SwissProt_GeneFunction, KEGG_ID, KO_ID) %>%
filter(SwissProt_GeneName != ".") %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| transcript | SwissProt_GeneName | SwissProt_GeneFunction | KEGG_ID | KO_ID |
|---|---|---|---|---|
| DN89_c3_g1_i2 | MEIS1 | Homeobox protein Meis1; | KEGG:mmu:17268 | KO:K15613 |
| DN10879_c1_g1_i8 | RLF | Zinc finger protein Rlf; | KEGG:hsa:6018 | NA |
| DN2043_c0_g1_i3 | KBRS2 | NF-kappa-B inhibitor-interacting Ras-like protein 2; | KEGG:gga:420032 | KO:K17197 |
| DN2036_c0_g1_i4 | MBD2 | Methyl-CpG-binding domain protein 2 {ECO:0000305}; | KEGG:hsa:8932 | KO:K11590 |
| DN430_c0_g1_i5 | GLT12 | Polypeptide N-acetylgalactosaminyltransferase 12; | KEGG:hsa:79695 | KO:K00710 |
| DN7842_c0_g1_i8 | ALG13 | UDP-N-acetylglucosamine transferase subunit ALG13 homolog; | KEGG:rno:300284 | KO:K07432 |
| DN1146_c0_g1_i3 | MD12L | Mediator of RNA polymerase II transcription subunit 12-like protein; | KEGG:hsa:116931 | KO:K15162 |
| DN1128_c0_g1_i5 | RCC1 | Regulator of chromosome condensation; | KEGG:hsa:1104 | KO:K11493 |
| DN6928_c0_g1_i5 | NAIF1 | Nuclear apoptosis-inducing factor 1; | . | NA |
| DN6099_c0_g1_i3 | SC22A | Vesicle-trafficking protein SEC22a; | KEGG:hsa:26984 | KO:K08520 |
| DN15503_c0_g1_i2 | POL | Gag-Pol polyprotein; | KEGG:vg:1491877 | NA |
| DN15518_c0_g1_i8 | SCNNB | Amiloride-sensitive sodium channel subunit beta; | . | NA |
| DN885_c0_g1_i1 | BUD31 | Protein BUD31 homolog; | KEGG:rno:89819 | KO:K12873 |
| DN5108_c0_g1_i1 | TBC13 | TBC1 domain family member 13; | KEGG:hsa:54662 | NA |
| DN4235_c0_g1_i1 | SIM13 | Small integral membrane protein 13; | KEGG:hsa:221710 | NA |
| DN4247_c0_g1_i13 | DPOG2 | DNA polymerase subunit gamma-2, mitochondrial; | KEGG:bta:505152 | KO:K02333 |
| DN4247_c0_g1_i13 | DPOG2 | DNA polymerase subunit gamma-2, mitochondrial; | KEGG:hsa:11232 | KO:K02333 |
| DN5456_c0_g1_i2 | C560 | Succinate dehydrogenase cytochrome b560 subunit, mitochondrial; | KEGG:ssc:100524676 | KO:K00236 |
| DN3676_c0_g1_i10 | CNOT6 | CCR4-NOT transcription complex subunit 6; | KEGG:rno:287249 | KO:K12603 |
| DN3676_c0_g1_i10 | CNOT6 | CCR4-NOT transcription complex subunit 6; | KEGG:xla:100505426 | KO:K12603 |
| DN2785_c0_g1_i2 | UBA5 | Ubiquitin-like modifier-activating enzyme 5; | KEGG:gga:414879 | KO:K12164 |
| DN8539_c0_g1_i1 | CHAC1 | Glutathione-specific gamma-glutamylcyclotransferase 1 {ECO:0000303|PubMed:27913623}; | KEGG:hsa:79094 | KO:K07232 |
| DN7628_c0_g1_i2 | DIEXF | Digestive organ expansion factor homolog; | KEGG:gga:421384 | KO:K14774 |
| DN207_c1_g1_i14 | IQEC1 | IQ motif and SEC7 domain-containing protein 1; | KEGG:hsa:9922 | KO:K12495 |
| DN4008_c0_g1_i8 | PTPRK | Receptor-type tyrosine-protein phosphatase kappa; | KEGG:hsa:5796 | KO:K06776 |
| DN4008_c0_g1_i8 | PTPRK | Receptor-type tyrosine-protein phosphatase kappa; | KEGG:hsa:5797 | KO:K05693 |
| DN663_c0_g1_i4 | AGRG5 | Adhesion G-protein coupled receptor G5; | KEGG:mmu:382045 | KO:K08459 |
| DN682_c0_g3_i4 | RRAGA | Ras-related GTP-binding protein A {ECO:0000305}; | KEGG:rno:117044 | KO:K16185 |
| DN613_c2_g1_i7 | MRVI1 | Protein MRVI1; | KEGG:hsa:10335 | KO:K12337 |
| DN603_c0_g1_i9 | TERT | Telomerase reverse transcriptase; | KEGG:cfa:403412 | KO:K11126 |
| DN3137_c0_g1_i9 | S2536 | Solute carrier family 25 member 36; | KEGG:gga:424817 | KO:K15116 |
| DN7924_c0_g1_i6 | CAMP1 | Calmodulin-regulated spectrin-associated protein 1; | KEGG:hsa:157922 | KO:K17493 |
| DN18178_c0_g1_i3 | PMGE | Bisphosphoglycerate mutase; | KEGG:hsa:669 | KO:K01837 |
| DN10078_c1_g1_i12 | PARN | Poly(A)-specific ribonuclease PARN; | KEGG:hsa:5073 | KO:K01148 |
| DN16556_c0_g1_i2 | BICRL | BRD4-interacting chromatin-remodeling complex-associated protein-like {ECO:0000250|UniProtKB:Q6AI39}; | KEGG:mmu:210982 | NA |
| DN5255_c0_g2_i6 | TMCC1 | Transmembrane and coiled-coil domains protein 1 {ECO:0000250|UniProtKB:O94876}; | KEGG:mmu:330401 | NA |
| DN4309_c0_g1_i7 | PCSK6 | Proprotein convertase subtilisin/kexin type 6; | KEGG:hsa:5046 | KO:K08672 |
| DN12268_c0_g1_i8 | KYNU | Kynureninase {ECO:0000255|HAMAP-Rule:MF_03017}; | KEGG:hsa:8942 | KO:K01556 |
| DN10663_c0_g1_i2 | SIR5 | NAD-dependent protein deacylase sirtuin-5, mitochondrial {ECO:0000255|HAMAP-Rule:MF_03160}; | KEGG:gga:420834 | KO:K11415 |
| DN1956_c0_g1_i7 | LPIN1 | Phosphatidate phosphatase LPIN1; | KEGG:mmu:14245 | KO:K15728 |
| DN6804_c0_g1_i1 | DKC1 | H/ACA ribonucleoprotein complex subunit DKC1; | KEGG:gga:422196 | KO:K11131 |
| DN6850_c1_g1_i3 | ELMO2 | Engulfment and cell motility protein 2; | KEGG:hsa:63916 | KO:K18985 |
| DN6850_c1_g1_i3 | ELMO2 | Engulfment and cell motility protein 2; | KEGG:mmu:228875 | KO:K15280 |
| DN22798_c0_g1_i8 | SNX17 | Sorting nexin-17; | KEGG:dre:568263 | KO:K17929 |
| DN22798_c0_g1_i8 | SNX17 | Sorting nexin-17; | KEGG:mmu:266781 | KO:K17929 |
| DN2376_c1_g1_i6 | AT8B2 | Phospholipid-transporting ATPase ID; | KEGG:hsa:57198 | KO:K01530 |
| DN2333_c0_g1_i2 | F169A | Soluble lamin-associated protein of 75 kDa; | KEGG:hsa:26049 | NA |
| DN2333_c1_g1_i1 | F135A | Protein FAM135A; | KEGG:hsa:57579 | NA |
| DN2397_c1_g1_i4 | IRAK4 | Interleukin-1 receptor-associated kinase 4; | KEGG:hsa:51135 | KO:K04733 |
| DN2397_c1_g1_i4 | IRAK4 | Interleukin-1 receptor-associated kinase 4; | KEGG:hsa:83448 | KO:K06176 |
| DN2397_c1_g1_i4 | IRAK4 | Interleukin-1 receptor-associated kinase 4; | KEGG:mmu:266632 | KO:K04733 |
| DN1433_c0_g1_i11 | ARI1B | AT-rich interactive domain-containing protein 1B; | KEGG:hsa:57492 | KO:K11653 |
| DN1433_c0_g1_i11 | ARI1B | AT-rich interactive domain-containing protein 1B; | KEGG:mmu:239985 | KO:K11653 |
| DN1449_c0_g1_i12 | CBX1 | Chromobox protein homolog 1; | KEGG:mmu:12412 | KO:K11585 |
| DN6364_c0_g1_i4 | PLGT2 | Protein O-glucosyltransferase 2 {ECO:0000305|PubMed:30127001}; | KEGG:hsa:79070 | NA |
| DN10398_c0_g1_i3 | RHG29 | Rho GTPase-activating protein 29; | KEGG:hsa:9411 | KO:K20644 |
| DN10398_c0_g1_i3 | RHG29 | Rho GTPase-activating protein 29; | KEGG:dre:378998 | KO:K20644 |
| DN6616_c2_g1_i17 | CCN2 | CCN family member 2; | KEGG:mmu:14219 | KO:K06827 |
| DN5702_c0_g1_i6 | LMBL3 | Lethal(3)malignant brain tumor-like protein 3; | KEGG:hsa:84456 | NA |
| DN133_c1_g1_i1 | LRWD1 | Leucine-rich repeat and WD repeat-containing protein 1; | KEGG:xtr:100145159 | NA |
| DN133_c1_g1_i1 | LRWD1 | Leucine-rich repeat and WD repeat-containing protein 1; | KEGG:xla:444544 | NA |
| DN21599_c0_g1_i6 | MNS1 | Meiosis-specific nuclear structural protein 1; | KEGG:mmu:17427 | NA |
| DN9768_c0_g1_i7 | CCNE2 | G1/S-specific cyclin-E2; | KEGG:hsa:9134 | KO:K06626 |
| DN9743_c0_g1_i21 | CXXC1 | CXXC-type zinc finger protein 1; | KEGG:bta:511446 | KO:K14960 |
| DN9743_c0_g1_i21 | CXXC1 | CXXC-type zinc finger protein 1; | KEGG:hsa:30827 | KO:K14960 |
| DN3018_c0_g1_i18 | KHK | Ketohexokinase {ECO:0000312|RGD:2966}; | KEGG:hsa:3795 | KO:K00846 |
| DN3018_c0_g1_i18 | KHK | Ketohexokinase {ECO:0000312|RGD:2966}; | KEGG:rno:25659 | KO:K00846 |
| DN3018_c0_g1_i18 | KHK | Ketohexokinase {ECO:0000312|RGD:2966}; | KEGG:hsa:284001 | NA |
| DN3044_c1_g1_i4 | ANR26 | Ankyrin repeat domain-containing protein 26 {ECO:0000305}; | KEGG:hsa:22852 | NA |
| DN582_c0_g1_i1 | RAD21 | Double-strand-break repair protein rad21 homolog; | KEGG:bta:540966 | KO:K06670 |
| DN2129_c0_g1_i12 | OGFD2 | 2-oxoglutarate and iron-dependent oxygenase domain-containing protein 2; | KEGG:dre:790923 | NA |
| DN10956_c0_g1_i1 | MRC1 | Macrophage mannose receptor 1; | KEGG:hsa:4360 | KO:K06560 |
| DN1264_c0_g1_i2 | REEP2 | Receptor expression-enhancing protein 2; | KEGG:hsa:51308 | KO:K17338 |
| DN1264_c0_g1_i2 | REEP2 | Receptor expression-enhancing protein 2; | KEGG:mmu:225362 | KO:K17338 |
| DN1230_c0_g1_i13 | CFLAR | CASP8 and FADD-like apoptosis regulator; | KEGG:hsa:8837 | KO:K04724 |
| DN6130_c0_g1_i8 | GCN1 | eIF-2-alpha kinase activator GCN1 {ECO:0000250|UniProtKB:E9PVA8}; | KEGG:hsa:10985 | NA |
| DN3358_c1_g1_i14 | TRPS1 | Zinc finger transcription factor Trps1; | KEGG:hsa:7227 | KO:K22040 |
| DN7377_c0_g1_i8 | ATAD3 | ATPase family AAA domain-containing protein 3; | KEGG:bta:784353 | KO:K17681 |
| DN7377_c0_g1_i8 | ATAD3 | ATPase family AAA domain-containing protein 3; | KEGG:xla:398759 | KO:K17681 |
| DN7377_c0_g1_i8 | ATAD3 | ATPase family AAA domain-containing protein 3; | KEGG:xtr:407895 | KO:K17681 |
| DN6423_c0_g1_i4 | IFT74 | Intraflagellar transport protein 74 homolog; | KEGG:mmu:67694 | KO:K19679 |
| DN10599_c0_g1_i1 | PEDF | Pigment epithelium-derived factor; | KEGG:mmu:20317 | KO:K19614 |
| DN1044_c0_g1_i3 | ZN318 | Zinc finger protein 318; | KEGG:hsa:24149 | NA |
| DN327_c0_g1_i6 | MSD2 | Myb/SANT-like DNA-binding domain-containing protein 2; | KEGG:gga:768763 | NA |
| DN308_c0_g1_i5 | ZCHC2 | Zinc finger CCHC domain-containing protein 2; | KEGG:hsa:54877 | KO:K22700 |
| DN5860_c0_g1_i2 | PACR | Pituitary adenylate cyclase-activating polypeptide type I receptor; | KEGG:bta:319095 | KO:K04587 |
| DN4911_c0_g1_i8 | SETMR | Histone-lysine N-methyltransferase SETMAR {ECO:0000305}; | KEGG:mmu:74729 | KO:K11433 |
| DN5091_c0_g2_i6 | ATL2 | ADAMTS-like protein 2; | KEGG:hsa:9719 | NA |
| DN745_c0_g1_i22 | SEPT7 | Septin-7; | KEGG:pon:100173884 | KO:K16944 |
| DN9837_c0_g1_i9 | HOP2 | Homologous-pairing protein 2 homolog; | KEGG:hsa:29893 | KO:K06695 |
| DN2291_c0_g1_i9 | MCAT | Mitochondrial carnitine/acylcarnitine carrier protein; | KEGG:mcf:102124681 | KO:K15109 |
| DN2291_c0_g1_i9 | MCAT | Mitochondrial carnitine/acylcarnitine carrier protein; | KEGG:mmu:57279 | KO:K15109 |
| DN9334_c0_g1_i4 | CARM1 | Histone-arginine methyltransferase CARM1; | KEGG:hsa:10498 | KO:K05931 |
| DN8480_c0_g2_i2 | ARI5B | AT-rich interactive domain-containing protein 5B; | KEGG:gga:423661 | NA |
| DN130272_c0_g3_i2 | CPTP | Ceramide-1-phosphate transfer protein; | KEGG:xla:735049 | NA |
topGO.mappings <- readMappings("G:/Shared drives/RNA Seq Supershedder Project/BWTE DE manuscript/extData/GOdbTrans.txt", sep = "\t", IDsep = ",")
goEnrich <- function(targetComp, ...) {
DE.results <- dt.tib %>% filter(!!sym(targetComp) != 0)
all.genes <- sort(unique(as.character(rownames(dge$counts))))
int.genes <- DE.results$transcript
int.genes <- factor(as.integer(all.genes %in% int.genes))
names(int.genes) = all.genes
go.obj.BP <- new("topGOdata", ontology='BP',
allGenes = int.genes,
annot = annFUN.gene2GO,
nodeSize = 5,
gene2GO = topGO.mappings)
go.obj.MF <- new("topGOdata", ontology='MF',
allGenes = int.genes,
annot = annFUN.gene2GO,
nodeSize = 5,
gene2GO = topGO.mappings)
go.obj.CC <- new("topGOdata", ontology='CC',
allGenes = int.genes,
annot = annFUN.gene2GO,
nodeSize = 5,
gene2GO = topGO.mappings)
results.BP <- runTest(go.obj.BP, algorithm = "elim", statistic = "fisher")
results.tab.BP <- GenTable(object = go.obj.BP,
elimFisher = results.BP,
topNodes = 50) %>%
as_tibble() %>%
mutate(pVal = as.numeric(elimFisher)) %>%
filter(pVal < 0.01) %>%
mutate(Domain = "BP") %>%
mutate(Comparison = targetComp)
results.MF <- runTest(go.obj.MF, algorithm = "elim", statistic = "fisher")
results.tab.MF <- GenTable(object = go.obj.MF,
elimFisher = results.MF,
topNodes = 50) %>%
as_tibble() %>%
mutate(pVal = as.numeric(elimFisher)) %>%
filter(pVal < 0.01) %>%
mutate(Domain = "MF") %>%
mutate(Comparison = targetComp)
results.CC <- runTest(go.obj.CC, algorithm = "elim", statistic = "fisher")
results.tab.CC <- GenTable(object = go.obj.CC,
elimFisher = results.CC,
topNodes = 50) %>%
as_tibble() %>%
mutate(pVal = as.numeric(elimFisher)) %>%
filter(pVal < 0.01) %>%
mutate(Domain = "CC") %>%
mutate(Comparison = targetComp)
rbind(results.tab.BP,
results.tab.MF,
results.tab.CC)
}
results <- list()
for(z in unique(names(dt.tib)[-1])) {
if (dt.tib %>%
filter(!!sym(z) != 0) %>%
nrow(.) > 0)
results[[length(results)+1]] <- goEnrich(z)
}
results.tib <- bind_rows(results, .id = "column_label") %>%
select(-column_label) %>%
arrange(GO.ID, Comparison)
kable(results.tib) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| GO.ID | Term | Annotated | Significant | Expected | elimFisher | pVal | Domain | Comparison |
|---|---|---|---|---|---|---|---|---|
| GO:0000002 | mitochondrial genome maintenance | 87 | 2 | 0.14 | 0.00927 | 0.009270 | BP | LvH.I5 |
| GO:0000262 | mitochondrial chromosome | 9 | 1 | 0.01 | 0.0059 | 0.005900 | CC | MvH.I5 |
| GO:0000333 | telomerase catalytic core complex | 5 | 1 | 0.01 | 0.0081 | 0.008100 | CC | LvH.I5 |
| GO:0000495 | box H/ACA snoRNA 3’-end processing | 14 | 2 | 0.02 | 0.00024 | 0.000240 | BP | LvH.I5 |
| GO:0000794 | condensed nuclear chromosome | 259 | 3 | 0.42 | 0.0088 | 0.008800 | CC | LvH.I5 |
| GO:0003420 | regulation of growth plate cartilage cho… | 14 | 1 | 0.01 | 0.0098 | 0.009800 | BP | MvH.I5 |
| GO:0003721 | telomerase RNA reverse transcriptase act… | 5 | 1 | 0.01 | 0.0085 | 0.008500 | MF | LvH.I5 |
| GO:0003887 | DNA-directed DNA polymerase activity | 212 | 2 | 0.15 | 0.0097 | 0.009700 | MF | MvH.I5 |
| GO:0004454 | ketohexokinase activity | 9 | 1 | 0.01 | 0.0063 | 0.006300 | MF | MvH.I5 |
| GO:0004535 | poly(A)-specific ribonuclease activity | 40 | 2 | 0.07 | 0.0021 | 0.002100 | MF | LvH.I5 |
| GO:0004999 | vasoactive intestinal polypeptide recept… | 8 | 1 | 0.01 | 0.0056 | 0.005600 | MF | MvH.I5 |
| GO:0005087 | Ran guanyl-nucleotide exchange factor ac… | 8 | 1 | 0.01 | 0.0056 | 0.005600 | MF | MvH.I5 |
| GO:0005721 | pericentric heterochromatin | 65 | 2 | 0.11 | 0.0051 | 0.005100 | CC | LvH.I5 |
| GO:0007004 | telomere maintenance via telomerase | 255 | 3 | 0.42 | 0.00886 | 0.008860 | BP | LvH.I5 |
| GO:0007354 | zygotic determination of anterior/poster… | 5 | 1 | 0.01 | 0.00827 | 0.008270 | BP | LvH.I5 |
| GO:0007568 | aging | 740 | 5 | 1.23 | 0.00776 | 0.007760 | BP | LvH.I5 |
| GO:0008013 | beta-catenin binding | 260 | 3 | 0.44 | 0.0099 | 0.009900 | MF | LvH.I5 |
| GO:0008327 | methyl-CpG binding | 50 | 2 | 0.08 | 0.0033 | 0.003300 | MF | LvH.I5 |
| GO:0015786 | UDP-glucose transmembrane transport | 12 | 1 | 0.01 | 0.0084 | 0.008400 | BP | MvH.I5 |
| GO:0016571 | histone methylation | 495 | 4 | 0.82 | 0.00933 | 0.009330 | BP | LvH.I5 |
| GO:0016571 | histone methylation | 495 | 3 | 0.35 | 0.0049 | 0.004900 | BP | MvH.I5 |
| GO:0016823 | hydrolase activity, acting on acid carbo… | 6 | 1 | 0.00 | 0.0042 | 0.004200 | MF | MvH.I5 |
| GO:0019441 | tryptophan catabolic process to kynureni… | 10 | 1 | 0.01 | 0.0070 | 0.007000 | BP | MvH.I5 |
| GO:0019442 | tryptophan catabolic process to acetyl-C… | 6 | 1 | 0.01 | 0.00991 | 0.009910 | BP | LvH.I5 |
| GO:0019442 | tryptophan catabolic process to acetyl-C… | 6 | 1 | 0.00 | 0.0042 | 0.004200 | BP | MvH.I5 |
| GO:0019805 | quinolinate biosynthetic process | 8 | 1 | 0.01 | 0.0056 | 0.005600 | BP | MvH.I5 |
| GO:0022904 | respiratory electron transport chain | 202 | 2 | 0.14 | 0.0088 | 0.008800 | BP | MvH.I5 |
| GO:0030374 | nuclear receptor transcription coactivat… | 238 | 3 | 0.40 | 0.0078 | 0.007800 | MF | LvH.I5 |
| GO:0031514 | motile cilium | 261 | 3 | 0.42 | 0.0089 | 0.008900 | CC | LvH.I5 |
| GO:0032902 | nerve growth factor production | 6 | 1 | 0.01 | 0.00991 | 0.009910 | BP | LvH.I5 |
| GO:0034354 | ‘de novo’ NAD biosynthetic process from … | 10 | 1 | 0.01 | 0.0070 | 0.007000 | BP | MvH.I5 |
| GO:0034645 | cellular macromolecule biosynthetic proc… | 11790 | 31 | 19.56 | 0.00835 | 0.008350 | BP | LvH.I5 |
| GO:0043199 | sulfate binding | 14 | 1 | 0.01 | 0.0098 | 0.009800 | MF | MvH.I5 |
| GO:0043420 | anthranilate metabolic process | 6 | 1 | 0.01 | 0.00991 | 0.009910 | BP | LvH.I5 |
| GO:0043420 | anthranilate metabolic process | 6 | 1 | 0.00 | 0.0042 | 0.004200 | BP | MvH.I5 |
| GO:0044030 | regulation of DNA methylation | 48 | 2 | 0.08 | 0.00291 | 0.002910 | BP | LvH.I5 |
| GO:0051091 | positive regulation of DNA-binding trans… | 607 | 3 | 0.43 | 0.0087 | 0.008700 | BP | MvH.I5 |
| GO:0051321 | meiotic cell cycle | 659 | 6 | 1.09 | 0.00080 | 0.000800 | BP | LvH.I5 |
| GO:0061035 | regulation of cartilage development | 185 | 3 | 0.31 | 0.00366 | 0.003660 | BP | LvH.I5 |
| GO:0061515 | myeloid cell development | 206 | 3 | 0.34 | 0.00494 | 0.004940 | BP | LvH.I5 |
| GO:0061624 | fructose catabolic process to hydroxyace… | 13 | 1 | 0.01 | 0.0091 | 0.009100 | BP | MvH.I5 |
| GO:0070034 | telomerase RNA binding | 81 | 2 | 0.14 | 0.0084 | 0.008400 | MF | LvH.I5 |
| GO:0071168 | protein localization to chromatin | 99 | 3 | 0.16 | 0.00061 | 0.000610 | BP | LvH.I5 |
| GO:0071168 | protein localization to chromatin | 99 | 3 | 0.07 | 4.7e-05 | 0.000047 | BP | MvH.I5 |
| GO:0071279 | cellular response to cobalt ion | 12 | 1 | 0.01 | 0.0084 | 0.008400 | BP | MvH.I5 |
| GO:0071549 | cellular response to dexamethasone stimu… | 88 | 2 | 0.15 | 0.00947 | 0.009470 | BP | LvH.I5 |
| GO:0097053 | L-kynurenine catabolic process | 9 | 1 | 0.01 | 0.0063 | 0.006300 | BP | MvH.I5 |
| GO:0097750 | endosome membrane tubulation | 6 | 1 | 0.01 | 0.00991 | 0.009910 | BP | LvH.I5 |
| GO:0140285 | endosome fission | 6 | 1 | 0.01 | 0.00991 | 0.009910 | BP | LvH.I5 |
| GO:1903506 | regulation of nucleic acid-templated tra… | 7535 | 21 | 12.50 | 0.00909 | 0.009090 | BP | LvH.I5 |